*SM-E11

set scheme stcolor
version 17
********************************************************************************
* HYPOTHESIS-LEVEL ANALYSIS
********************************************************************************

	use "../Metadata/tess_analysisdata.dta", clear

	cap drop hyp_true
	gen hyp_true=1 if twop<0.05 & rightdir=="yes"
	replace hyp_true=1 if twop<0.05 & rightdir==""
	replace hyp_true=0 if twop>=0.05 
	replace hyp_true=0 if twop<0.05 & rightdir=="no"
	
* make variables	
		recode title (1 2 6=0) (3/5=1), gen(faculty)
		lab var faculty "Faculty PI"

		gen polsci = (discipline == 5)
		lab var polsci "Political science"

		recode numauthors (1=0) (2/10=1), gen(authors2)
		lab var authors2 "Co-authored"

		recode check_type (0=0) (1/3=1), gen(checks)
		lab var checks "Any checks"

		recode num_measures (1=0) (2/50=1), gen(multiitem)
		lab var multiitem "Multiple items"
		
		recode outcome_categories (2=1) (3/8=0), gen(binarydv)
		lab var binarydv "Binary DV"

		recode outcome_categories (2/7=0) (8=1), gen(continuousdv)
		lab var continuousdv "Continuous DV"			
		
	* relabel var for length
		lab var type_framing "Framing exp."
		lab var type_priming "Priming exp."
		lab var type_vignette "Vignette exp."
		lab var type_questwording "Question-wording exp."
		lab var type_information "Information provision exp."
		lab var type_beliefelicit "Belief elicitation exp."
		lab var type_writing "Writing treatment"
		lab var type_conj_dce "Conjoint/Discrete choice exp."
		
* keep insample obs
		keep if insample==1

* set random sort order
		set seed 10101
		gen random=runiform()
		sort random
			
* how many variables? (using 300 iterations)
		gen outofbagerror2=.
		gen validationerror2=.
		gen nvars=.

		local j=0
		forval i=1(1)26 {
			local j=`j'+1
			
		rforest hyp_true ///
			N_person ///
			medtime15  ///
			type_beliefelicit type_conj_dce type_framing type_information ///
			type_priming type_questwording type_vignette type_writing ///
			mediation moderation hyp_t hyp_ks hyp_default ///
			woman ///
			faculty polsci shortstudy ///
			pilot clarity ///
			checks ///	
			binarydv continuousdv multiitem ///
			authors2 ///
			panel ///
			in 1/506, ///
			 iter(300) type(class) numvars(`i')
			 
			 
			 qui replace nvars= `i' in `j'
			 qui replace outofbagerror2= `e(OOB_Error)' in `j'
			 predict p in 507/1012
			 qui replace validationerror2 = `e(error_rate)' in `j'
			 drop p
		}


			label var outofbagerror2 "Out of Bag Error"
			label var nvars "No. of variables"
			label var validationerror2 "Validation Error"
			scatter outofbagerror2 nvars, mcolor(blue) msize(tiny) || ///
			scatter validationerror2 nvars, mcolor(red) msize(tiny)
			
		// assess lowest validation error rate	
		cap frame drop mydata 
		frame put validationerror2 outofbagerror2 nvars, into(mydata)  
		frame mydata { 
			gen totalerror=validationerror2 +outofbagerror2
			sort totalerror, stable 
			local min_val_err = validationerror2[1] 
			local min_nvars = nvars[1] 
			} 

		di "Minimum error ="  `min_val_err' 
		di "Corresponding number of variables =" `min_nvars' 

	
		frame drop mydata 
	
* estimate hyp-level model
		rforest hyp_true ///
			N_person ///
			medtime15  ///
			type_beliefelicit type_conj_dce type_framing type_information ///
			type_priming type_questwording type_vignette type_writing ///
			mediation moderation hyp_t hyp_ks hyp_default ///
			woman ///
			faculty polsci shortstudy ///
			pilot clarity ///
			checks ///	
			binarydv continuousdv multiitem ///
			authors2 ///
			panel ///
			in 1/506, ///
			iter(300) type(class) numvars(6)
		di e(OOB_Error)  
		cap drop p
		predict p in 507/1012
		di e(error_rate) 

* variable importance matrix
		matrix importance = e(importance)
		matrix list importance

		// create a variable with importance scores from matrix
		cap drop importance
		svmat importance

		// create a variable with row names from matrix
		cap drop importid
		generate importid= ""
		local names: rownames importance 
		local k: word count `names'
		if `k'>_N {
			set obs `k'
		}
		forval i=1(1)`k' {
			local aword: word `i' of `names'
			local alabel: var label `aword'
			if ("`alabel'"!="") qui replace importid = "`alabel'" in `i'
			else qui replace importid= "`aword'" in `i'
		}



// to add directional arrows: create a variable with correlations of var with success
		correl hyp_true ///
			N_person ///
			medtime15  ///
			type_beliefelicit type_conj_dce type_framing type_information ///
			type_priming type_questwording type_vignette type_writing ///
			mediation moderation hyp_t hyp_ks hyp_default ///
			woman ///
			faculty polsci shortstudy ///
			pilot clarity ///
			checks ///	
			binarydv continuousdv multiitem ///
			authors2 ///
			panel 
	

		matrix corr = r(C)
		matrix list corr
		matselrc corr corr1, r(2/28) c(1/1)
		matrix list corr1

		// create a new variable with correlations from matrix
		cap drop corr1
		svmat corr1

		cap drop direction
		gen direction=""
		replace direction=" ({&uarr})" if corr11>0 & corr11!=.
		replace direction=" ({&darr})" if corr11<0 & corr11!=.

		// concat importid and direction
		cap drop labels
		egen labels = concat(importid direction)


* Hyp level importance score chart
		graph hbar (mean) importance1 if importance1!=., ///
		over(labels, sort(1) descending label(labsize(3.3)) axis(noline)) ///
		bar(1, fcolor(stc1*.6) lc(none)) ///
		ytitle(importance1) ///
		ytitle("Variable importance score", size(3.2) margin(l=-8)) ///
		ylabel(, labsize(3.3) nogrid) ///
		graphregion(color(white) margin(l=1)) scheme(s2mono) ///
		title("{it: Hypothesis-level support}" ///
		, span) ///
		name(RFhyp, replace) 

********************************************************************************		
* STUDY LEVEL ANALYSIS
********************************************************************************
		
use "../Metadata/tess_analysisdata.dta", clear

	cap drop hyp_true hyp_true_insample hyp_anytrue_insample total_hyp_true_insample hyp_10pct_supp_insample successfulexp_insample
	
	gen hyp_true=1 if twop<0.05 & rightdir=="yes"
	replace hyp_true=1 if twop<0.05 & rightdir==""
	replace hyp_true=0 if twop>=0.05 
	replace hyp_true=0 if twop<0.05 & rightdir=="no"
	

* at least one hypothesis was supported in the study
	* true hypotheses insample
	gen hyp_true_insample= hyp_true if insample==1

	egen hyp_anytrue_insample=max(hyp_true_insample), by(vendor_id)
	lab var hyp_anytrue_insample "At least 1 hypothesis supported"


* at least 10% of hypotheses supported 
	// total hypotheses supported 
	egen total_hyp_true_insample=sum(hyp_true_insample), by(vendor_id)

	gen temp=total_hyp_insample*0.1
	gen hyp_10pct_supp_insample=1 if total_hyp_true_insample>=temp & total_hyp_true_insample!=.
	replace hyp_10pct_supp_insample=0 if total_hyp_true_insample<temp & temp!=.
	drop temp
	sum hyp_10pct_supp_insample
	lab var hyp_10pct_supp_insample "At least 10% hypotheses supported"	
	
* successful experiment (at least 1 hyp true if #hyp<=10; or 10% hyp true if #hyp>10)	
	gen successfulexp_insample=hyp_anytrue_insample if total_hyp_insample<=10
	replace successfulexp_insample=hyp_10pct_supp_insample if total_hyp_insample>10

	lab var successfulexp_insample "Successful study"	
	

* make variables
		recode title (1 2 6=0) (3/5=1), gen(faculty)
		lab var faculty "Faculty PI"

		gen polsci = (discipline == 5)
		lab var polsci "Political science"

		recode numauthors (1=0) (2/10=1), gen(authors2)
		lab var authors2 "Co-authored"

		recode check_type (0=0) (1/3=1), gen(checks)
		lab var checks "Any checks"


		* relabel var for length
			lab var type_framing "Framing exp."
			lab var type_priming "Priming exp."
			lab var type_vignette "Vignette exp."
			lab var type_questwording "Question-wording exp."
			lab var type_information "Information provision exp."
			lab var type_beliefelicit "Belief elicitation exp."
			lab var type_writing "Writing treatment"
			lab var type_conj_dce "Conjoint/Discrete choice exp."
			

			
		* average level of hyp-level vars
			* at least one binary DV
			recode outcome_categories (2=1) (3/8=0), gen(bin)
			egen binarydv= mean(bin), by(vendor_id)
			lab var binarydv "Binary DV (at least 1)"

			recode outcome_categories (2/7=0) (8=1), gen(cont)
			egen continuousdv= mean(cont), by(vendor_id)
			lab var continuousdv "Continuous DV (at least 1)"				
			
			
			* number of items used to measure DV, on average across the study
			egen avgnummeasures= mean(num_measures), by(vendor_id)
			forval i=1/5 {
			lab var avgnummeasures "Avg. # of items per DV"
			}
			
			* whether the study had any of these types of hypotheses
			tab hyp_type, gen(hypcat)
			forval i=1/5 {	
			egen  hypcatany`i'=max(hypcat`i'), by(vendor_id)	
			}
			lab var hypcatany1 "Default treat. comp. (at least 1)"
			lab var hypcatany2 "Moderation test (at least 1)"
			lab var hypcatany3 "Mediation test (at least 1)"
			lab var hypcatany4 "t-test (at least 1)"
			lab var hypcatany5 "KS-test (at least 1)"	

			lab var medtime15 "Median duration (mins.)"
			
			
		* study-level analysis	
		keep if hyp_num==1	
	
* set seed
		set seed 10101
		generate u = runiform()
		sort u


* how many variables at a time? (using 400 iterations)
		gen outofbagerror2=.
		gen validationerror2=.
		gen nvars=.

		local j=0
		forval i=1(1)25 {
			local j=`j'+1
			
		rforest successfulexp_insample ///
			samplesize ///
			medtime15  ///
			type_beliefelicit type_conj_dce type_framing type_information ///
			type_priming type_questwording type_vignette type_writing ///
			hypcatany* ///
			woman ///
			faculty polsci shortstudy ///
			pilot clarity ///
			checks ///
			binarydv continuousdv avgnummeasures ///
			authors2 ///
			in 1/60, ///
			 iter(400) type(class) numvars(`i')
			 
			 
			 qui replace nvars= `i' in `j'
			 qui replace outofbagerror2= `e(OOB_Error)' in `j'
			 predict p in 61/100
			 qui replace validationerror2 = `e(error_rate)' in `j'
			 drop p
		}


			label var outofbagerror2 "Out of Bag Error"
			label var nvars "No. of variables"
			label var validationerror2 "Validation Error"
			scatter outofbagerror2 nvars, mcolor(blue) msize(tiny) || ///
			scatter validationerror2 nvars, mcolor(red) msize(tiny)


		// assess lowest validation error rate	
		cap frame drop mydata 
		frame put validationerror2 outofbagerror2 nvars, into(mydata)  
		frame mydata { 
			gen totalerror=validationerror2 +outofbagerror2
			sort totalerror, stable 
			local min_val_err = validationerror2[1] 
			local min_nvars = nvars[1] 
			} 

		di "Minimum error ="  `min_val_err' 
		di "Corresponding number of variables =" `min_nvars' 
		// 2
			
		cap frame drop mydata 

* estimate model
		rforest successfulexp_insample ///
			samplesize ///
			medtime15  ///
			type_beliefelicit type_conj_dce type_framing type_information ///
			type_priming type_questwording type_vignette type_writing ///
			hypcatany* ///
			woman ///
			faculty polsci shortstudy ///
			pilot clarity ///
			checks ///
			binarydv continuousdv avgnummeasures ///
			authors2 ///
			, ///
			iter(400) type(class) numvars(2)
		di e(OOB_Error) 
		cap drop p
		predict p
		di e(error_rate) // there is no error, because all data is used



* variable importance matrix
		matrix importance = e(importance)
		matrix list importance

		// create a variable with importance scores from matrix
		cap drop importance
		svmat importance

		// create a variable with row names from matrix
		cap drop importid
		generate importid= ""
		local names: rownames importance 
		local k: word count `names'
		if `k'>_N {
			set obs `k'
		}
		forval i=1(1)`k' {
			local aword: word `i' of `names'
			local alabel: var label `aword'
			if ("`alabel'"!="") qui replace importid = "`alabel'" in `i'
			else qui replace importid= "`aword'" in `i'
		}


// to add directional arrows: create a variable with correlations of var with success
		correl successfulexp_insample ///
			samplesize ///
			medtime15  ///
			type_beliefelicit type_conj_dce type_framing type_information ///
			type_priming type_questwording type_vignette type_writing ///
			hypcatany* ///
			woman ///
			faculty polsci shortstudy ///
			pilot clarity ///
			checks ///
			binarydv continuousdv avgnummeasures ///
			authors2 
		

			matrix corr = r(C)
			matrix list corr
			matselrc corr corr1, r(2/27) c(1/1)
			matrix list corr1

			// create a new variable with correlations from matrix
			cap drop corr1
			svmat corr1

			cap drop direction
			gen direction=""
			replace direction=" ({&uarr})" if corr11>0 & corr11!=.
			replace direction=" ({&darr})" if corr11<0 & corr11!=.

			// concat importid and direction
			cap drop labels
			egen labels = concat(importid direction)



* Importance score chart
		graph hbar (mean) importance1, ///
		over(labels, sort(1) descending label(labsize(3.3)) axis(noline)) ///
		bar(1, fcolor(stc2*.6) lc(none)) ///
		ytitle(importance1) ///
		ytitle("Variable importance score", size(3.2) margin(l=-8)) ///
		ylabel(, labsize(3.3) nogrid) ///
		graphregion(color(white)) scheme(stcolor) ///
		title("{it: Study-level positive result}", span) ///
		name(RFstudy, replace)


* COMBINE GRAPHS		
graph combine RFhyp RFstudy, ///
graphregion(color(white)) scheme(s2mono) 

graph export "../Results/SM-E11-Figure-RandomForest_nobonf.pdf", replace
